library(lubridate)
library(dplyr)
library(geosphere)
library(leaflet)
library(dbscan)
library(MASS)
library(ggplot2)
library(plotly)UFO Sigthings Near Area51- Visualization
Loading the necessary libraries
Loading the Data set and perform pre-processing of data
# Loading the dataset
file_path <- "ufo-sightings-transformed.csv"
ufo_dataset <- read.csv(file_path)
# Overview of the dataset
head(ufo_dataset) X Date_time date_documented Year Month Hour Season Country_Code
1 0 1949-10-10 20:30:00 4/27/2004 1949 10 20 Autumn USA
2 1 1949-10-10 21:00:00 12/16/2005 1949 10 21 Autumn USA
3 2 1955-10-10 17:00:00 1/21/2008 1955 10 17 Autumn GBR
4 3 1956-10-10 21:00:00 1/17/2004 1956 10 21 Autumn USA
5 4 1960-10-10 20:00:00 1/22/2004 1960 10 20 Autumn USA
6 5 1961-10-10 19:00:00 4/27/2007 1961 10 19 Autumn USA
Country Region Locale latitude longitude UFO_shape
1 United States Texas San Marcos 29.88306 -97.941111 Cylinder
2 United States Texas Bexar County 29.38421 -98.581082 Light
3 United Kingdom England Chester 53.20000 -2.916667 Circle
4 United States Texas Edna 28.97833 -96.645833 Circle
5 United States Hawaii Kaneohe 21.41806 -157.803611 Light
6 United States Tennessee Bristol 36.59500 -82.188889 Sphere
length_of_encounter_seconds Encounter_Duration
1 2700 45 minutes
2 7200 1-2 hrs
3 20 20 seconds
4 20 1/2 hour
5 900 15 minutes
6 300 5 minutes
Description
1 This event took place in early fall around 1949-50. It occurred after a Boy Scout meeting in the Baptist Church. The Baptist Church sit
2 1949 Lackland AFB, TX. Lights racing across the sky & making 90 degree turns on a dime.
3 Green/Orange circular disc over Chester, England
4 My older brother and twin sister were leaving the only Edna theater at about 9 PM,...we had our bikes and I took a different route home
5 AS a Marine 1st Lt. flying an FJ4B fighter/attack aircraft on a solo night exercise, I was at 50ꯠ' in a "clean" aircraft (no ordinan
6 My father is now 89 my brother 52 the girl with us now 51 myself 49 and the other fellow which worked with my father if he's still livi
summary(ufo_dataset) X Date_time date_documented Year
Min. : 0 Length:80328 Length:80328 Min. :1906
1st Qu.:20082 Class :character Class :character 1st Qu.:2001
Median :40164 Mode :character Mode :character Median :2006
Mean :40164 Mean :2004
3rd Qu.:60245 3rd Qu.:2011
Max. :80327 Max. :2014
Month Hour Season Country_Code
Min. : 1.000 Min. : 0.00 Length:80328 Length:80328
1st Qu.: 4.000 1st Qu.:10.00 Class :character Class :character
Median : 7.000 Median :19.00 Mode :character Mode :character
Mean : 6.835 Mean :15.53
3rd Qu.: 9.000 3rd Qu.:21.00
Max. :12.000 Max. :23.00
Country Region Locale latitude
Length:80328 Length:80328 Length:80328 Min. :-82.86
Class :character Class :character Class :character 1st Qu.: 34.13
Mode :character Mode :character Mode :character Median : 39.41
Mean : 38.12
3rd Qu.: 42.79
Max. : 72.70
longitude UFO_shape length_of_encounter_seconds
Min. :-176.66 Length:80328 Min. : 0
1st Qu.:-112.07 Class :character 1st Qu.: 30
Median : -87.90 Mode :character Median : 180
Mean : -86.77 Mean : 9017
3rd Qu.: -78.75 3rd Qu.: 600
Max. : 178.44 Max. :97836000
Encounter_Duration Description
Length:80328 Length:80328
Class :character Class :character
Mode :character Mode :character
# Checking for missing values
colSums(ufo_dataset == "" | is.na(ufo_dataset)) X Date_time
0 0
date_documented Year
0 0
Month Hour
0 0
Season Country_Code
0 259
Country Region
259 566
Locale latitude
457 0
longitude UFO_shape
0 1930
length_of_encounter_seconds Encounter_Duration
0 0
Description
15
# Removing rows with missing or blank values
ufo_dataset <- ufo_dataset[!(apply(ufo_dataset, 1, function(row) any(row == "" | is.na(row)))), ]
# Converting Date_time to datetime
ufo_dataset$Date_time <- ymd_hms(ufo_dataset$Date_time)
# Converting date_documented to date
ufo_dataset$date_documented <- mdy(ufo_dataset$date_documented)
# Converting columns to categorical variables (factors)
ufo_dataset$Season <- as.factor(ufo_dataset$Season)
ufo_dataset$Country_Code <- as.factor(ufo_dataset$Country_Code)
ufo_dataset$Country <- as.factor(ufo_dataset$Country)
ufo_dataset$Region <- as.factor(ufo_dataset$Region)
ufo_dataset$Locale <- as.factor(ufo_dataset$Locale)
ufo_dataset$UFO_shape <- as.factor(ufo_dataset$UFO_shape)Get Area-51 Coordinates.
# Coordinates for Area 51
area51_coords <- c(-115.808, 37.233)
# Function to calculate distance from Area 51
calculate_distance <- function(lon, lat) {
# Converting to miles
dist <- distGeo(c(lon, lat), area51_coords) / 1609.34
return(dist)
}
# Adding a distance column to the dataset
data <- ufo_dataset %>%
mutate(Distance_to_Area51 = mapply(calculate_distance, longitude, latitude))
# Filtering for sightings within 50 miles of Area 51
area51_sightings <- data %>%
filter(Distance_to_Area51 <= 50)Pointing the UFO sigthings near Area 51
leaflet(data = area51_sightings) %>%
addTiles() %>%
addCircleMarkers(
lng = ~longitude,
lat = ~latitude,
popup = ~paste(
"Date and Time:", Date_time, "<br>",
"Season:", Season, "<br>",
"Region:", Region, "<br>",
"Locale:", Locale
),
radius = 5,
color = "red",
fillOpacity = 0.8
) %>%
setView(lng = -115.808, lat = 37.233, zoom = 8)library(plotly)
# Defining Area 51 coordinates
area51_coords <- data.frame(longitude = -115.808, latitude = 37.233)
# Adjusting the density plot and annotating Area 51
gg_density <- ggplot(area51_sightings, aes(x = longitude, y = latitude)) +
geom_density2d() +
stat_density2d(aes(fill = ..level..), geom = "polygon", alpha = 0.4) +
geom_point(data = area51_coords, aes(x = longitude, y = latitude),
color = "red", size = 3) +
annotate("text", x = -115.808, y = 37.233, label = "Area 51",
color = "red", size = 5, hjust = 0, vjust = -1) +
labs(title = "Density of UFO Sightings Near Area 51",
x = "Longitude", y = "Latitude") +
xlim(-116, -115.5) +
ylim(36.5, 37.5) +
theme_minimal()
# Converting to an interactive plot
interactive_density <- ggplotly(gg_density)
# Displaying the plot
interactive_densityTime-series patterns at AREA 51
Extracting Date and Time From the Dataset
area51_sightings$Year <- format(area51_sightings$Date_time, "%Y")
area51_sightings$Month <- format(area51_sightings$Date_time, "%m")
area51_sightings$Day <- format(area51_sightings$Date_time, "%d")
area51_sightings$Hour <- format(area51_sightings$Date_time, "%H")Analysing Sightings by Year
# Grouping by year and count sightings
sightings_by_year <- area51_sightings %>%
group_by(Year) %>%
summarise(Sightings = n())
# Plotting the trend over years
ggplot(sightings_by_year, aes(x = as.numeric(Year), y = Sightings)) +
geom_line(color = "blue", size = 1) +
geom_point(size = 2) +
labs(title = "UFO Sightings Near Area 51 Over the Years",
x = "Year", y = "Number of Sightings") +
theme_minimal()Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Analyze Sightings by Month
# Grouping by month and count sightings
sightings_by_month <- area51_sightings %>%
group_by(Month) %>%
summarise(Sightings = n())
# Plotting the trend over months
ggplot(sightings_by_month, aes(x = Month, y = Sightings)) +
geom_bar(stat = "identity", fill = "blue", alpha = 0.7) +
labs(title = "UFO Sightings Near Area 51 by Month",
x = "Month", y = "Number of Sightings") +
theme_minimal()Analyzing Sightings by Hour
# Grouping by hour and count sightings
sightings_by_hour <- area51_sightings %>%
group_by(Hour) %>%
summarise(Sightings = n())
# Plotting the trend over hours
ggplot(sightings_by_hour, aes(x = as.numeric(Hour), y = Sightings)) +
geom_bar(stat = "identity", fill = "blue", alpha = 0.7) +
labs(title = "UFO Sightings Near Area 51 by Hour",
x = "Hour of Day", y = "Number of Sightings") +
theme_minimal()Combining Date and Time Trends
# Grouping by hour and month and count sightings
sightings_by_time <- area51_sightings %>%
group_by(Month, Hour) %>%
summarise(Sightings = n())
# Plotting a heatmap
ggplot(sightings_by_time, aes(x = as.numeric(Month), y = as.numeric(Hour), fill = Sightings)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "blue") +
labs(title = "UFO Sightings Near Area 51 by Month and Hour",
x = "Month", y = "Hour of Day", fill = "Sightings") +
theme_minimal()Adding Time Filters to Spatial Data
# Filtering for July evenings (8 PM–10 PM)
july_evenings <- area51_sightings %>%
filter(Month == "07" & Hour >= 20 & Hour <= 22)
# Plotting heat map for July evenings
ggplot(july_evenings, aes(x = longitude, y = latitude)) +
geom_density2d() +
stat_density2d(aes(fill = ..level..), geom = "polygon", alpha = 0.5) +
scale_fill_gradient(low = "lightblue", high = "red") +
labs(title = "UFO Sightings in July Evenings Near Area 51",
x = "Longitude", y = "Latitude") +
theme_minimal()coordinates <- data.frame(
longitude = july_evenings$longitude,
latitude = july_evenings$latitude,
Region = july_evenings$Region,
Locale = july_evenings$Locale
)
# Computing density
density_estimation <- kde2d(coordinates$longitude, coordinates$latitude, n = 100)
# Converting density grid into a data frame
density_data <- as.data.frame(expand.grid(
longitude = density_estimation$x,
latitude = density_estimation$y
))
# Adding density levels
density_data$density <- as.vector(density_estimation$z)
# Step 1: Filter for high-density regions (top 5% density values)
high_density_points <- density_data %>%
filter(density >= quantile(density, 0.95))
# Step 2: Removing duplicates for latitude and longitude
high_density_unique <- high_density_points %>%
distinct(longitude, latitude, .keep_all = TRUE)Ploting UFO Sightings Near Area 51 During July-Evenings
# Perform clustering on latitude and longitude
coordinates <- july_evenings[, c("longitude", "latitude")]
dbscan_result <- dbscan(coordinates, eps = 0.01, minPts = 5)
# Add cluster labels to data
july_evenings$Cluster <- factor(dbscan_result$cluster)
# Plot clusters
ggplot(july_evenings, aes(x = longitude, y = latitude, color = Cluster)) +
geom_point(size = 2, alpha = 0.8) +
labs(title = "UFO Sightings Clusters Near Area 51 During July-eveings.",
x = "Longitude", y = "Latitude", color = "Cluster") +
theme_minimal()UFO Sightings Clusters with Connections Near Area 51 During July-Evenings
# Connect points by cluster (lines)
july_evenings_cluster_lines <- plot_ly(
data = july_evenings,
x = ~longitude,
y = ~latitude,
color = ~Cluster,
type = 'scatter',
mode = 'markers+lines', # Add lines between points
marker = list(size = 10, opacity = 0.8),
text = ~paste(
"Cluster:", Cluster, "<br>",
"Longitude:", longitude, "<br>",
"Latitude:", latitude, "<br>",
"Locale:", Locale
)
) %>%
layout(
title = "UFO Sightings Clusters with Connections Near Area 51 During July-Eveings",
xaxis = list(title = "Longitude"),
yaxis = list(title = "Latitude"),
hovermode = "closest"
)
# Display the interactive plot
july_evenings_cluster_linesPerforming Clustering at UFO Sightings Near Area 51
# Performing clustering on latitude and longitude
coordinates <- area51_sightings[, c("longitude", "latitude")]
dbscan_result <- dbscan(coordinates, eps = 0.01, minPts = 5)
# Adding cluster labels to data
area51_sightings$Cluster <- factor(dbscan_result$cluster)
# Plotting clusters
ggplot(area51_sightings, aes(x = longitude, y = latitude, color = Cluster)) +
geom_point(size = 2, alpha = 0.8) +
labs(title = "UFO Sightings Clusters Near Area 51",
x = "Longitude", y = "Latitude", color = "Cluster") +
theme_minimal()UFO Sightings Clusters with Connections Near Area 51
# Connecting points by cluster (lines)
plotly_cluster_lines <- plot_ly(
data = area51_sightings,
x = ~longitude,
y = ~latitude,
color = ~Cluster,
type = 'scatter',
mode = 'markers+lines',
marker = list(size = 10, opacity = 0.8),
text = ~paste(
"Cluster:", Cluster, "<br>",
"Longitude:", longitude, "<br>",
"Latitude:", latitude
)
) %>%
layout(
title = "UFO Sightings Clusters with Connections Near Area 51",
xaxis = list(title = "Longitude"),
yaxis = list(title = "Latitude"),
hovermode = "closest"
)
# Displaying the interactive plot
plotly_cluster_lines